EDA

Author

Daniel Chng, Leon Tan & Zoe Chia

Published

February 28, 2023

Modified

March 22, 2023

# Import Packages
pacman::p_load(sf, tmap, tidyverse, sfdep, readxl, Kendall, plotly, plyr)
# Import MPSZ
mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019")
Reading layer `MPSZ-2019' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
mpsz <- st_transform(mpsz, 3414)
st_crs(mpsz)
Coordinate Reference System:
  User input: EPSG:3414 
  wkt:
PROJCRS["SVY21 / Singapore TM",
    BASEGEOGCRS["SVY21",
        DATUM["SVY21",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4757]],
    CONVERSION["Singapore Transverse Mercator",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Cadastre, engineering survey, topographic mapping."],
        AREA["Singapore - onshore and offshore."],
        BBOX[1.13,103.59,1.47,104.07]],
    ID["EPSG",3414]]
write_rds(mpsz, "data/model/mpsz.rds")
# Import MRT Stations
mrt <- read.csv("data/geospatial/mrtsg.csv")

mrt_sf <- st_as_sf(mrt,
                   coords = c("Longitude",
                              "Latitude"),
                              crs = 4326) %>%
  st_transform(crs = 3414)
write_rds(mrt_sf, "data/model/mrt_sf.rds")
# Tourism
tourism_sf <- st_read(dsn = "data/geospatial", layer = "TOURISM")
Reading layer `TOURISM' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 107 features and 19 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -1.797693e+308 ymin: -1.797693e+308 xmax: 43659.54 ymax: 47596.73
Projected CRS: SVY21
# Assign EPSG Code
tourism_sf <- st_transform(tourism_sf, 3414)
write_rds(tourism_sf, "data/model/tourism_sf.rds")
# Shopping Malls
shopping <- read.csv("data/geospatial/mall_coordinates.csv")

shopping <- shopping %>%
  select(name, latitude, longitude)

glimpse(shopping)
Rows: 199
Columns: 3
$ name      <chr> "100 AM", "i12 Katong", "313@SOMERSET", "321 CLEMENTI", "600…
$ latitude  <dbl> 1.274588, 1.305087, 1.301385, 1.312025, 1.334042, 1.437131, …
$ longitude <dbl> 103.8435, 103.9051, 103.8377, 103.7650, 103.8510, 103.7953, …
shopping_sf <- st_as_sf(shopping,
                              coords = c("longitude",
                                         "latitude"),
                              crs = 4326) %>%
  st_transform(crs = 3414)
write_rds(shopping_sf, "data/model/shopping_sf.rds")
# Hexagon for Map
hexagons <- st_read(dsn = "data/geospatial", layer = "hexagons")
Reading layer `hexagons' from data source 
  `C:\wamp64\www\IS415-Project\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 3125 features and 6 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 21506.33 xmax: 50010.26 ymax: 50256.33
Projected CRS: SVY21 / Singapore TM
hexagons_sf <- st_transform(hexagons, 3414)
write_rds(hexagons_sf, "data/model/hexagons_sf.rds")
# HDB Population by Geographical Location
hdb <- read.csv("data/aspatial/hdb-resident-population-by-geographical-distribution.csv")
# Filter out 2018 data
hdb <- filter(hdb, shs_year == "2018")
write_rds(hdb, "data/model/hdb.rds")
tmap_mode('view')
tmap_options(check.and.fix = TRUE) +
tm_shape(mpsz) +
  tm_polygons() +
tm_shape(mrt_sf) +
  tm_dots(alph=0.5, size=0.01)+
  tm_view(set.zoom.limits = c(11,14))
mpsz_hdb <- left_join(mpsz, hdb, by = c("PLN_AREA_N" = "town_estate"))
# Population Density
tmap_mode('view')
tm_shape(mpsz_hdb) +
  tm_fill("number")
# MRT vs Population Density
tmap_mode('view')
tm_shape(mpsz_hdb) +
  tm_fill("number") +
tm_shape(mrt_sf) +
  tm_dots(alph=0.5, size=0.01)+
  tm_view(set.zoom.limits = c(11,14))
# Population Data 2022
popdata <- read_csv("data/aspatial/respopagesexfa2022.csv")
popdata2022 <- popdata %>%
  spread(AG, Pop) %>%
  mutate(YOUNG = `0_to_4`+`5_to_9`+`10_to_14`+
`15_to_19`+`20_to_24`) %>%
mutate(`ECONOMY ACTIVE` = rowSums(.[9:13])+
rowSums(.[15:17]))%>%
mutate(`AGED`=rowSums(.[18:22])) %>%
mutate(`TOTAL`=rowSums(.[5:22])) %>%  
mutate(`DEPENDENCY` = (`YOUNG` + `AGED`)
/`ECONOMY ACTIVE`) %>%
mutate_at(.vars = vars(PA, SZ), toupper) %>%
select(`PA`, `SZ`, `YOUNG`, `ECONOMY ACTIVE`, `AGED`, 
       `TOTAL`, `DEPENDENCY`) %>%
  filter(`ECONOMY ACTIVE` > 0)
popdata2022 <- popdata2022 %>%
  mutate_at(.vars = vars(PA, SZ), 
          .funs = funs(toupper)) %>%
  filter(`ECONOMY ACTIVE` > 0)
write_rds(popdata2022, "data/model/popdata2022.rds")
mpsz_popdata2022 <- left_join(mpsz, popdata2022, by = c("SUBZONE_N" = "SZ"))
write_rds(mpsz_popdata2022, "data/model/mpsz_popdata2022.rds")
# MRT vs Population Density
tmap_mode('view')
tm_shape(mpsz_popdata2022) +
  tm_fill("TOTAL") +
  tm_borders(lwd = 0.1,  alpha = 1) +
tm_shape(mrt_sf) +
  tm_dots(alph=0.5, size=0.01)+
  tm_view(set.zoom.limits = c(11,14))